import json

import dateutil
from cvss import parser as cvss_parser

from dojo.models import Finding


class GithubVulnerabilityParser:
    def get_scan_types(self):
        return ["Github Vulnerability Scan"]

    def get_label_for_scan_types(self, scan_type):
        return scan_type  # no custom label for now

    def get_description_for_scan_types(self, scan_type):
        return "Import vulnerabilities from Github API (GraphQL Query)"

    def get_findings(self, filename, test):
        data = json.load(filename)

        if isinstance(data, dict):
            if "data" not in data:
                error_msg = (
                    "Invalid report format, expected a GitHub RepositoryVulnerabilityAlert GraphQL query response."
                )
                raise ValueError(error_msg)

            alerts = self._search_vulnerability_alerts(data.get("data"))
            if not alerts:
                error_msg = "Invalid report, no 'vulnerabilityAlerts' node found"
                raise ValueError(error_msg)

            repo = data.get("data").get("repository", {})
            repo_url = repo.get("url")

            dupes = {}
            for alert in alerts.get("nodes", []):
                vuln = alert.get("securityVulnerability", {})
                advisory = vuln.get("advisory", {})
                summary = advisory.get("summary", "")
                desc = advisory.get("description", "")

                pr_link = None
                dependabot_update = alert.get("dependabotUpdate", {})
                if dependabot_update:
                    pr = dependabot_update.get("pullRequest", {})
                    if pr:
                        pr_link = pr.get("permalink")
                        desc = f"Fix PR: [{pr_link}]({pr_link})\n" + desc

                alert_num = alert.get("number")
                if alert_num and repo_url:
                    alert_link = f"{repo_url}/security/dependabot/{alert_num}"
                    desc = f"Repo Alert: [{alert_link}]({alert_link})\n" + desc

                finding = Finding(
                    title=summary,
                    test=test,
                    description=desc,
                    severity=self._convert_security(vuln.get("severity", "MODERATE")),
                    static_finding=True,
                    dynamic_finding=False,
                    unique_id_from_tool=alert.get("id"),
                )

                if alert_num and repo_url:
                    finding.url = alert_link

                cwes = advisory.get("cwes", {}).get("nodes", [])
                if cwes:
                    cwe_id = cwes[0].get("cweId", "")[4:]
                    if cwe_id.isdigit():
                        finding.cwe = int(cwe_id)

                if alert.get("vulnerableManifestPath"):
                    finding.file_path = alert.get("vulnerableManifestPath")
                req = alert.get("vulnerableRequirements", "")
                if req.startswith("= "):
                    finding.component_version = req[2:]
                elif req:
                    finding.component_version = req
                pkg = vuln.get("package", {})
                finding.component_name = pkg.get("name")

                if alert.get("createdAt"):
                    finding.date = dateutil.parser.parse(alert.get("createdAt"))
                if alert.get("state") in {"FIXED", "DISMISSED"}:
                    finding.active = False
                    finding.is_mitigated = True

                ref_urls = [r.get("url") for r in advisory.get("references", []) if r.get("url")]
                if alert_num and repo_url:
                    ref_urls.append(alert_link)
                if pr_link:
                    ref_urls.append(pr_link)
                if ref_urls:
                    finding.references = "\r\n".join(ref_urls)

                ids = [i.get("value") for i in advisory.get("identifiers", []) if i.get("value")]
                if ids:
                    for identifier in ids:
                        if identifier.startswith("CVE-"):
                            finding.cve = identifier
                        elif identifier.startswith("GHSA-"):
                            finding.vuln_id_from_tool = identifier
                    if not finding.vuln_id_from_tool:
                        finding.vuln_id_from_tool = ids[0]
                    finding.unsaved_vulnerability_ids = ids

                # cvss is deprecated, so we favor cvssSeverities if it exists
                for key in ("cvssSeverities", "cvss"):
                    cvss = advisory.get(key, {})
                    if key == "cvssSeverities" and cvss:
                        cvss = cvss.get("cvssV3", {})
                    if cvss:
                        score = cvss.get("score")
                        if score is not None:
                            finding.cvssv3_score = score
                        vec = cvss.get("vectorString")
                        if vec:
                            parsed = cvss_parser.parse_cvss_from_text(vec)
                            if parsed:
                                finding.cvssv3 = parsed[0].clean_vector()
                        break

                epss = advisory.get("epss", {})
                percentage = epss.get("percentage")
                percentile = epss.get("percentile")
                if percentage is not None:
                    finding.epss_score = percentage
                if percentile is not None:
                    finding.epss_percentile = percentile

                dupe_key = finding.unique_id_from_tool
                if dupe_key in dupes:
                    dupes[dupe_key].nb_occurences += 1
                else:
                    dupes[dupe_key] = finding

            return list(dupes.values())

        if isinstance(data, list):
            findings = []
            for vuln in data:
                url = vuln["url"]
                html_url = vuln["html_url"]
                active = vuln["state"] == "open"
                ruleid = vuln["rule"]["id"]
                ruleseverity = vuln["rule"]["severity"]
                ruledescription = vuln["rule"]["description"]
                rulename = vuln["rule"]["name"]
                ruletags = vuln["rule"]["tags"]
                severity = vuln["rule"]["security_severity_level"]
                most_recent_instanceref = vuln["most_recent_instance"]["ref"]
                most_recent_instanceanalysis_key = vuln["most_recent_instance"]["analysis_key"]
                most_recent_instanceenvironment = vuln["most_recent_instance"]["environment"]
                most_recent_instancecategory = vuln["most_recent_instance"]["category"]
                most_recent_instancestate = vuln["most_recent_instance"]["state"]
                most_recent_instancecommit_sha = vuln["most_recent_instance"]["commit_sha"]
                most_recent_instancemessage = vuln["most_recent_instance"]["message"]["text"]
                location = vuln["most_recent_instance"]["location"]
                instancesurl = vuln["instances_url"]
                description = ruledescription + "\n"
                description += "**url:** " + url + "\n"
                description += "**html_url:** " + html_url + "\n"
                description += "**ruleid:** " + ruleid + "\n"
                description += "**ruleseverity:** " + ruleseverity + "\n"
                description += "**ruledescription:** " + ruledescription + "\n"
                description += "**rulename:** " + rulename + "\n"
                description += "**ruletags:** " + str(ruletags) + "\n"
                description += "**most_recent_instanceref:** " + most_recent_instanceref + "\n"
                description += "**most_recent_instanceanalysis_key:** " + most_recent_instanceanalysis_key + "\n"
                description += "**most_recent_instanceenvironment:** " + most_recent_instanceenvironment + "\n"
                description += "**most_recent_instancecategory:** " + most_recent_instancecategory + "\n"
                description += "**most_recent_instancestate:** " + most_recent_instancestate + "\n"
                description += "**most_recent_instancecommit_sha:** " + most_recent_instancecommit_sha + "\n"
                description += "**most_recent_instancemessage:** " + most_recent_instancemessage + "\n"
                description += "**location:** " + str(location) + "\n"
                description += "**instancesurl:** " + instancesurl + "\n"
                uniqueid = ruleid + url + most_recent_instanceanalysis_key + str(location)
                finding = Finding(
                    title=ruleid,
                    test=test,
                    description=description,
                    severity=severity.capitalize(),
                    active=active,
                    static_finding=True,
                    dynamic_finding=False,
                    unique_id_from_tool=uniqueid,
                )
                findings.append(finding)
            return findings
        error_msg = (
            "Invalid report format, expected a GitHub RepositoryVulnerabilityAlert GraphQL query response."
        )
        raise TypeError(error_msg)

    def _search_vulnerability_alerts(self, data):
        if isinstance(data, dict):
            if "vulnerabilityAlerts" in data:
                return data["vulnerabilityAlerts"]
            for v in data.values():
                res = self._search_vulnerability_alerts(v)
                if res:
                    return res
        elif isinstance(data, list):
            for item in data:
                res = self._search_vulnerability_alerts(item)
                if res:
                    return res
        return None

    def _convert_security(self, val):
        return "Medium" if val.lower() == "moderate" else val.title()
